#required packages packages
packages <- c('ggplot2', 'plyr', 'jsonlite', 'lme4', 'lsmeans', 'grid', 'gridExtra', 'png')

#load them
lapply(packages, library, character.only = TRUE)
## Loading required package: Matrix
## Loading required package: estimability
## [[1]]
## [1] "ggplot2"   "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
## [1] "plyr"      "ggplot2"   "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "jsonlite"  "plyr"      "ggplot2"   "stats"     "graphics" 
##  [6] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "lme4"      "Matrix"    "jsonlite"  "plyr"      "ggplot2"  
##  [6] "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [11] "methods"   "base"     
## 
## [[5]]
##  [1] "lsmeans"      "estimability" "lme4"         "Matrix"      
##  [5] "jsonlite"     "plyr"         "ggplot2"      "stats"       
##  [9] "graphics"     "grDevices"    "utils"        "datasets"    
## [13] "methods"      "base"        
## 
## [[6]]
##  [1] "grid"         "lsmeans"      "estimability" "lme4"        
##  [5] "Matrix"       "jsonlite"     "plyr"         "ggplot2"     
##  [9] "stats"        "graphics"     "grDevices"    "utils"       
## [13] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "gridExtra"    "grid"         "lsmeans"      "estimability"
##  [5] "lme4"         "Matrix"       "jsonlite"     "plyr"        
##  [9] "ggplot2"      "stats"        "graphics"     "grDevices"   
## [13] "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "png"          "gridExtra"    "grid"         "lsmeans"     
##  [5] "estimability" "lme4"         "Matrix"       "jsonlite"    
##  [9] "plyr"         "ggplot2"      "stats"        "graphics"    
## [13] "grDevices"    "utils"        "datasets"     "methods"     
## [17] "base"
#path of where the jsons files are
foldername<-"/home/hanshalbe/bamcp/"

#difference function, we'll need this to get the pay-off per move as they've been saved 
mydiff<-function(x){
  y<-x
  for (i in 2:length(x)){
    #difference to previous entry
    y[i]<-y[i]-x[i-1]
  }
  return(y)
}

#standard error, we'll need that for the error bars
se<-function(x){sd(x)/sqrt(length(x))}

Score over blocks

The easiest thing to analyze are the scores over blocks, so let’s start with that…

#168 particitipants in total
idnum<-168

#getting the right format for the way the json's are named
format<-formatC(format="d", 1:idnum, flag="0", width=ceiling(log10(max(idnum))))

#initialize empty data frame
dat<-data.frame(id=numeric(), condition=numeric(), block=numeric(), payoff=numeric())

#loop through the json files
for (i in 1:idnum){
  #get individual json
  myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
  #payoff per block
  payoff<-mydiff(as.numeric(unlist(myjson$allPayoffCounts)))[1:5]
  #condition
  condition<-rep(myjson$condition, 5)
  #block number
  block<-1:5
  #participant's id
  id<-rep(i, 5)
  #dummy frame
  dummy<-data.frame(id, condition, block, payoff)
  #attach to main frame
  dat<-rbind(dat, dummy)
}
#recode conditions
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))
#create a frame for plotting with means and standard errors per block and condition
dplot<-ddply(dat, ~cond+block, summarize, mu=mean(payoff), se=se(payoff))
#change condition to factor for plotting and type consitency
dplot$cond<-as.factor(dplot$cond)
#set limits
limits <- aes(ymax = mu + se, ymin=mu - se)

#dodge bars a little
pd <- position_dodge(.2)

#plot the means over blocks and condition
p<-ggplot(dplot, aes(x=block, y=mu, col=cond)) +
  #error bars
  geom_errorbar(aes(ymin=mu-se, ymax=mu+se), width=0.2, size=1, position=pd) +
  #lines
  geom_line(position=pd, size=1) +
  #classic theme, legend on bottom
  theme_classic()+theme(text = element_text(size=22,  family="serif"), legend.position="bottom")+
  ylab("Mean outcome")+xlab("Block")+
  guides(col=guide_legend(title="Condition (beta distribution)"))+
  #adjust text size
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=2))
#show plot
print(p)

We can already see that the average score seems to be higher for the 0.5-0.5 group than for the 1-2 group. Additionally, scores go up over blocks in general. Thus, participants seem to get better over time in this task. This is nice as they seem to learn as time passes.

Let’s test if this is significant though.

#mixed effects with blocks nested within participants
m<-lmer(payoff~block+cond+(block|id), data=dat)
#summary, this is where we look at the fixed effects
summary(m)
## Linear mixed model fit by REML ['lmerMod']
## Formula: payoff ~ block + cond + (block | id)
##    Data: dat
## 
## REML criterion at convergence: -1472.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6345 -0.6558 -0.0587  0.6000  4.5608 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr 
##  id       (Intercept) 0.0016249 0.04031       
##           block       0.0001611 0.01269  -0.74
##  Residual             0.0088469 0.09406       
## Number of obs: 840, groups:  id, 168
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  0.276125   0.009106  30.325
## block        0.008570   0.002495   3.435
## cond1-2     -0.027153   0.007827  -3.469
## 
## Correlation of Fixed Effects:
##         (Intr) block 
## block   -0.794       
## cond1-2 -0.430  0.000

This cements the intuition derived from the previous plot statistically. Notice however that the scores for the two conditions are expected to be different a priori, i.e. they don’t have the same mean even if movements were performed at random.

Do the conditions cause different scores in test maps?

Let’s check out if participants learn so differently in the different conditions that this even influences their scores in the test maps.

#initialize empty data frame
dat<-data.frame(id=numeric(), condition=numeric(), payoff=numeric())
#loop through participants
for (i in 1:idnum){
  #get in json
  myjson<-fromJSON(paste0(foldername ,"ppt", format[i], ".json"))
  #get the scores by calculating the differences
  payoff<-mydiff(as.numeric(unlist(myjson$allPayoffCounts)))[6:13]
  #conditions
  condition<-rep(myjson$condition, 8)
  #running id number
  id<-rep(i, 8)
  #data frame
  dummy<-data.frame(id, condition, payoff)
  #bind them together
  dat<-rbind(dat, dummy)
}
#recode condition names
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))

#a data frame for plotting
dplot<-ddply(dat, ~cond,summarize, mean=mean(payoff), se=se(payoff))
#limits
limits <- aes(ymax = mean + se, ymin=mean - se)

#do the plot
p <- ggplot(dplot, aes(y=mean, x=cond, fill=cond)) + 
  #bars
  geom_bar(position="dodge", stat="identity")+
  #golden ratio error bars
  geom_errorbar(limits, position="dodge", width=0.31)+
  #point size
  geom_point(size=3)+
  #labs
  xlab("Condition")+
  #classic theme
  theme_classic() +
  #adjust text size
  theme(text = element_text(size=22, family="serif"))+
  #adjust text size
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=2))

print(p)

Even though there is a difference in performance during the open maps, there is no difference between the two groups in the test maps. I think this is good news as both groups seem to understand what to do in the testmaps (more on this later).

Dwelling times

One of the main predictions from the BAMCP-paper is the dwelling time of the algorithm between the different conditions. Dwelling is defined as staying in the same row or column for a longer time. Thus, we should check if this difference occurs within the open maps between conditions.

#a function to keep track of dwelling
mydwelling<-function(x){
  #initialize at 0
  count<-1
  for (i in 2:length(x)){
    #count increments if movement has changed
    if (x[i]!=x[i-1]){count<-count+1}
  }
  return(count)
}

#initialize frame
dat<-data.frame(id=numeric(), condition=numeric(), dwell=numeric())

#loop through participants
for (i in 1:idnum){
  #get json
  myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
  #dwell vector
  dwell<-rep(0, 5)
  #loop through open maps
  for (k in 1:5){
    #dwelling times
    dwell[k]<-mydwelling(unlist(myjson$allMovementTrackers[k]))
  }
  #condition
  condition<-rep(myjson$condition, 5)
  #id number
  id<-rep(i, 5)
  #dummy frame
  dummy<-data.frame(id, condition, dwell)
  #attach
  dat<-rbind(dat, dummy)
}

#recode condition
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))

#data frame for plotting
dplot<-ddply(dat, ~cond,summarize, mean=mean(dwell), se=se(dwell))
#do the plot
p <- ggplot(dplot, aes(y=mean, x=cond, fill=cond)) + 
  #bars
  geom_bar(position="dodge", stat="identity")+
  #golden ratio error bars
  geom_errorbar(limits, position="dodge", width=0.31)+
  #point size
  geom_point(size=3)+
  #title
  theme_classic() +
  #labs
  xlab("Condition")+ylab("Dwelling time")+
  #adjust text size
  theme(text = element_text(size=22, family="serif"))+
  #adjust theme
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=2))
p

So this seems to go into the right direction, but also looks like a relatively small differencelet’s. Let’s see if this is also significant.

#just a normal regression, I know it's not great, but gives us an upper band of what's possible (kind of)
m<-lm(dwell~cond, data=dat)
#let's check
summary(m)
## 
## Call:
## lm(formula = dwell ~ cond, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.007 -15.007  -0.102  12.898  58.993 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  33.1024     0.9062  36.530   <2e-16 ***
## cond1-2       1.9048     1.2815   1.486    0.138    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.57 on 838 degrees of freedom
## Multiple R-squared:  0.002629,   Adjusted R-squared:  0.001439 
## F-statistic: 2.209 on 1 and 838 DF,  p-value: 0.1376

Seems like it’s going in the right direction but not significant. The effect of dwelling time is definitely not as big as predicted by BAMCP. I don’t think that’s bad as I don’t really think participants behave as efficiently as a BAMCP here. Also might make it ok for us to drop coding up the full BAMCP-model and instead focus on out-of-the-box tree search algorithms.

Further analyzing the open maps

Somehow the potato-vector (i.e., whether or not a potato was found on a given move) did not always get saved (it is recoverable though, we’re working on it). However, we can still easily look at other things such as the probability of potatoes over time. For this, I will first create and explain some higher level helper functions.

#This gets the location over time within the maps given the movements
getlocation<-function(movement){
  #total map
  map<-matrix(0,nrow=210, ncol=210)
  #column at start
  col<-106
  #row at start
  row<-106
  #loop through movement vector
  for (i in seq_along(movement)){
    #the current location on the map gets incremented
    map[row,col]<-map[row,col]+1
    #if movement is up, row number is incremented
    if (movement[i]=="up"){row<-row+1}
    #if movement is down, rown number is decremented
    if (movement[i]=="down"){row<-row-1}
    #if movement is right, column number is incremented
    if (movement[i]=="right"){col<-col+1}
    #if movement is left, column number is decremented
    if (movement[i]=="left"){col<-col-1}
  }
  return(map)
}

#this returns the vector of visited probabilities, i.e. the underlying p(potato)
getprobs<-function(movement, m){
  #total map
  map<-matrix(0,nrow=210, ncol=210)
  #column number
  col<-106
  #row number
  row<-106
  #initialize probability vector
  probs<-rep(0, length(movement))
  for (i in seq_along(movement)){
    map[row,col]<-map[row,col]+1
    #probability vector
    probs[i]<-m[row, col]
    if (movement[i]=="up"){row<-row+1}
    if (movement[i]=="down"){row<-row-1}
    if (movement[i]=="right"){col<-col+1}
    if (movement[i]=="left"){col<-col-1}
  }
  return(probs)
}

These two function generate the trajectory on the map on which a participant walked along and the probability vector, i.e. the underlying probs, over which the participant moved. So let’s take it from here and do some further plotting.

#initialize data frame
dat<-data.frame(prob=numeric(), trial=numeric(), condition=numeric(), block=numeric(), id=numeric())

#loop through the json files
for (i in 1:idnum){
  #get individual json
  myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
  #initialize the vectors
  probs<-numeric()
  numb<-numeric()
  for (j in 1:5){
    #probabilities
    p<-getprobs(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
    #number of trials
    numb<-c(numb, 1:100)
  }
  #dummy frame
  dummy<-data.frame(prob=p, trial=numb, condition=rep(myjson$condition, 5), block=rep(1:5, each=100), id=rep(i, 500))
  #bind them together
  dat<-rbind(dat, dummy)
}
#recode conditions
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))
#create a frame for plotting with means and standard errors per block and condition
dplot<-ddply(dat, ~cond+trial, summarize, mu=mean(prob), se=se(prob))
#change condition to factor for plotting and type consitency
dplot$cond<-as.factor(dplot$cond)
#set limits
limits <- aes(ymax = mu + se, ymin=mu - se)

#plot the means over blocks and condition
p<-ggplot(dplot, aes(x=trial, y=mu, col=cond)) +
  #error bars
  geom_errorbar(aes(ymin=mu-se, ymax=mu+se), width=0.2, size=1, position=pd) +
  #lines
  geom_line(position=pd, size=1) +
  #classic theme, legend on bottom
  theme_classic()+
  theme(text = element_text(size=22,  family="serif"), legend.position="bottom")+
  ylab("Mean probability")+xlab("Trial")+
  guides(col=guide_legend(title="Condition (beta distribution)"))+
  #adjust text size
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=2))
#show plot
print(p)

The plot above shows the mean probabilities over time. It looks fairly noisy, but let’s do a really rough test. Note: you can also further bin the trials, i.e. means for 10 trials of time. This looks smoother but still no real linear trend.

#logistic regression with probability vector
#notice that this will produce a warning but still an efficient estimate, we only need the transform
m<-glm(prob~trial, data=dat)
#summarize
summary(m)
## 
## Call:
## glm(formula = prob ~ trial, data = dat)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.25286  -0.19698  -0.07872   0.13287   0.76394  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.292e-01  1.667e-03 137.482   <2e-16 ***
## trial       2.369e-04  2.866e-05   8.268   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.05747753)
## 
##     Null deviance: 4831.9  on 83999  degrees of freedom
## Residual deviance: 4828.0  on 83998  degrees of freedom
## AIC: -1548.7
## 
## Number of Fisher Scoring iterations: 2

So the odds of potatoes do go up (yet only slightly) over time. Participants seem to get a little better as trials go by. I guess it’s also ok if this is not too strong in any case, it’s a complicated task and sometimes randomness strikes.

The window of used information

We can expand on the analysis above and try and assess how many steps ahead participants might actually plan. I proposed to do this by assessing the k-steps ahead auto-correlation of the probability vector and a (crude) measure of uncertainty. For this, let me first create some helper functions.

#this returns a vector of a crude estimate of how often a particular
#row-column combination has been explored before
getexplore<-function(movement, m){
  #create map
  map<-matrix(0,nrow=210, ncol=210)
  #col-number
  col<-106
  #row-number
  row<-106
  #initialize vectpr
  exploration<-rep(0, length(movement))
  #loop through
  for (i in seq_along(movement)){
    #keep track
    map[row,col]<-map[row,col]+1
    g<-getlocation(movement[1:i])
    #row exploration
    rs<-rowSums(g)
    #column exploration
    cs<-colSums(g)
    #crude combination of the two
    exploration[i]<-0.5*rs[row]/sum(rs)+0.5*cs[col]/sum(cs)
    #Updates as usual
    if (movement[i]=="up"){row<-row+1}
    if (movement[i]=="down"){row<-row-1}
    if (movement[i]=="right"){col<-col+1}
    if (movement[i]=="left"){col<-col-1}
  }
  return(exploration)
}

#this function returns the probability vector of a random mover and will serve as a baseline
randprobs<-function(movement, m){
  map<-matrix(0,nrow=210, ncol=210)
  col<-106
  row<-106
  probs<-rep(0, length(movement))
  for (i in seq_along(movement)){
    map[row,col]<-map[row,col]+1
    probs[i]<-m[row, col]
    #random move
    move<-sample(c("up", "down", "left", "right"), 1)
    if (move=="up"){row<-row+1}
    if (move=="down"){row<-row-1}
    if (move=="right"){col<-col+1}
    if (move=="left"){col<-col-1}
  }
  return(probs)
}

#this function returns the exploration of a random mover, again as a baseline
randexplore<-function(movement, m){
  map<-matrix(0,nrow=210, ncol=210)
  col<-106
  row<-106
  exploration<-rep(0, length(movement))
  for (i in seq_along(movement)){
    map[row,col]<-map[row,col]+1
    rs<-rowSums(map)
    cs<-colSums(map)
    exploration[i]<-0.5*rs[row]/sum(rs)+0.5*cs[col]/sum(cs)
    #random move
    move<-sample(c("up", "down", "left", "right"), 1)
    if (move=="up"){row<-row+1}
    if (move=="down"){row<-row-1}
    if (move=="right"){col<-col+1}
    if (move=="left"){col<-col-1}
  }
  return(exploration)
}

I can use these functions now to compare the lag of siginificant auto-correlations within the vectors created by participants and a random mover. If there is a significant auto-correlation that is bigger than chance and goes k-steps back, then this means that, for example, rewards that are k-steps away influence present moves.

#initialize vectors
correlation<-randcor<-exploration<-randexp<-numb<-numeric()
#loop through
for (i in 1:idnum){
  myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
  for (j in 1:5){
    #probability vector
    p<-getprobs(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
    #random probability vector
    prand<-randprobs(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
    #exploration vector
    e<-getexplore(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
    #random exploration vector
    erand<-randexplore(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
    #correlation functions
    correlation<-c(correlation, acf(rev(p), plot=FALSE)$acf[2:11])
    exploration<-c(exploration, acf(rev(e), plot=FALSE)$acf[2:11])
    randcor<-c(randcor, acf(prand, plot=FALSE)$acf[2:11])
    randexp<-c(randexp, acf(erand, plot=FALSE)$acf[2:11])
    #lag numbers
    numb<-c(numb, 1:10)
  }
}

#data frame
d<-data.frame(cor=correlation, rcor=randcor, exp=exploration, rexp=randexp, num=numb)
#plots of correlations
dplot<-ddply(d, ~num, summarize, cor=mean(cor), rcor=mean(randcor), rex=mean(exp), randex=mean(randexp))
#restack slightly
dplot<-data.frame(horizon=rep(1:10, 4), 
                  correlation=c(dplot$cor, dplot$rcor, dplot$rex, dplot$randex), 
                  Method=rep(rep(c("Empirical", "Random"), each=10), 2),
                  Value=rep(c("Expectation", "Certainty"), each=20))

#horizon smaller than 6 seems enough
dplot<-subset(dplot, horizon<6)

#do the plot
p <- ggplot(dplot, aes(x=horizon, y=correlation, col=Method)) + 
  #bars
  geom_line(stat="identity")+
  #0 to 1
  ylim(c(0,0.8)) + 
  #golden ratio error bars
  geom_point(size=1)+
  #facet the plot
  facet_wrap(~Value, scales = "free_y")+
  #theme and titles
  theme_classic() +xlab("Horizon")+ylab("ACF")+
  ggtitle("Planning horizon")+
  theme(text = element_text(size=20, family="serif"),
        strip.background = element_blank(),
        plot.title = element_text(size=20),
        legend.position = "bottom")

print(p)

So this seems to suggest that a horizon of about k=3-4 affects participants’ moves. However, we will have to assess this by using the tree search algorithms later on. Another possibility could be to do the same thing with the empirical probs (will need the potato vector etc.), the empirical uncertainty and a UCB vector. This could then also be done with arima-x models actually.

Test maps:

Let’s focus on the test maps. We want to know if the obstacles (restricted vs. unrestricted) and the information (high vs. low) influenced participants behavior.

Our hypotheses were: A: more information leads to better planning and thus to better scores and B: More physical restrictions in the map lead to better planning and thus to better scores. Let’s first recode the data as it’s saved in a slightly complicated manner.

#initialize frame
dat<-data.frame(id=numeric(), map=numeric(), info=numeric(), outcome=numeric(), trap=numeric())
#loop through
for (i in 1:idnum){
  myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
  maps<-numeric()
  infos<-numeric()
  trap<-numeric()
  for (j in 1:8){
    #outer product to generate matrix
    m<-outer(myjson$allRowParameters[[5+j]] ,myjson$allColParameters[[5+j]])
    #fake central movement matrix
    mt<-t(matrix(c("no","up" ,"no", "left", "no", "right", "no", "down", "no"), nrow=3, ncol=3))
    #where is the bait
    mt<-mt[m[11:13,11:13]==0.8]
    #where from the center is it?
    bait<-mt[mt!="no"]
    #high or low info, roughly assessed but seems to work
    info<-ifelse(sum(myjson$allExploredRows[[5+j]])+sum(myjson$allExploredCols[[5+j]])>200, "high", "low")
    #these are the conditionals as set up by moritz
    if (bait=="right" & info=="high"){map<-"map1"}
    if (bait=="left" & info=="low"){map<-"map2"}
    if (bait=="left" & info=="high"){map<-"map3"}
    if (bait=="right" & info=="low"){map<-"map4"}
    if (bait=="down" & info=="high"){map<-"map5"}
    if (bait=="up" & info=="low"){map<-"map6"}
    if (bait=="up" & info=="high"){map<-"map7"}
    if (bait=="down" & info=="low"){map<-"map8"}
    #did they go for the trap
    trap<-c(trap, myjson$allMovementTrackers[[5+j]][1]==bait)
    #what map was it
    maps<-c(maps, map)
    #info
    infos<-c(infos,info)
  }
  #id
  id<-rep(i, 8)
  #total potato counts
  outcome<-as.numeric(unlist(myjson$allPotatoCounts[6:13]))
  #dummy frame
  dummy<-data.frame(id=id, map=maps, info=infos, outcome=outcome, trap=trap)
  #bind them
  dat<-rbind(dat, dummy)
}

#restricted as by setup
dat$res<-mapvalues(dat$map, paste0("map", 1:8), c("restricted", "restricted", "unrestricted","unrestricted","restricted","restricted","unrestricted","unrestricted" )) 

Now we can use this data frame to test if restriction and information influenced the score in the test maps.

m1<-lm(outcome~info+res, data=dat)
summary(m1)
## 
## Call:
## lm(formula = outcome ~ info + res, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7932 -1.2187  0.2068  1.2068  4.7812 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.79315    0.07612  49.831  < 2e-16 ***
## infolow         -0.19643    0.08790  -2.235   0.0256 *  
## resunrestricted -0.37798    0.08790  -4.300 1.83e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.611 on 1341 degrees of freedom
## Multiple R-squared:  0.01721,    Adjusted R-squared:  0.01575 
## F-statistic: 11.74 on 2 and 1341 DF,  p-value: 8.791e-06

Cool! Low information and less restrictions lead to lower scores. Thus, these two hypotheses can be confirmed.

Let’s check if these variables also influence the probability to go for the bait.

m2<-glm(trap~info+res, data=dat, family="binomial")
summary(m2)
## 
## Call:
## glm(formula = trap ~ info + res, family = "binomial", data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1914  -1.0394  -0.8844   1.3219   1.5020  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.33356    0.09616  -3.469 0.000523 ***
## infolow          0.36644    0.11180   3.278 0.001047 ** 
## resunrestricted -0.40328    0.11181  -3.607 0.000310 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1823.6  on 1343  degrees of freedom
## Residual deviance: 1799.9  on 1341  degrees of freedom
## AIC: 1805.9
## 
## Number of Fisher Scoring iterations: 4

Less so. Seems like less restrictions make it less likely to go for the bait. I think this is quite intuitive as there are simply more options with less restrictions, i.e. more direction to go.

#remove what we've stored so far (this is just to avoid further clashes and safe memory)
rm(list=ls())

#this function again:
getlocation<-function(movement){
  map<-matrix(0,nrow=23, ncol=23)
  col<-12
  row<-12
  for (i in seq_along(movement)){
    map[row,col]<-map[row,col]+1
    if (movement[i]=="up"){row<-row+1}
    if (movement[i]=="down"){row<-row-1}
    if (movement[i]=="right"){col<-col+1}
    if (movement[i]=="left"){col<-col-1}
  }
  return(map)
}


x<-1:168
format<-formatC(format="d",x,flag="0",width=ceiling(log10(max(x))))

#a list of all test maps
m<-rep(list(matrix(0, nrow=23, ncol=23)), 8)

#loop through participants
for (i in 1:168){
  #json
  myjson<-fromJSON(paste0("/home/hanshalbe/bamcp/ppt", format[i], ".json"))
  #loop through test maps
  for (j in 1:8){
    #get target
    ma<-outer(myjson$allRowParameters[[5+j]] ,myjson$allColParameters[[5+j]])
    #check where the bait is
    mt<-t(matrix(c("no","up" ,"no", "left", "no", "right", "no", "down", "no"), nrow=3, ncol=3))
    mt<-mt[ma[11:13,11:13]==0.8]
    bait<-mt[mt!="no"]
    #check info
    info<-ifelse(sum(myjson$allExploredRows[[5+j]])+sum(myjson$allExploredCols[[5+j]])>200, "high", "low")
    #recode and add counts if map identified
    if (bait=="right" & info=="high"){m[[1]]<-m[[1]]+getlocation(myjson$allMovementTrackers[[5+j]])}
    if (bait=="left" & info=="low"){m[[2]]<-m[[2]]+getlocation(myjson$allMovementTrackers[[5+j]])}
    if (bait=="left" & info=="high"){m[[3]]<-m[[3]]+getlocation(myjson$allMovementTrackers[[5+j]])}
    if (bait=="right" & info=="low"){m[[4]]<-m[[4]]+getlocation(myjson$allMovementTrackers[[5+j]])}
    if (bait=="down" & info=="high"){m[[5]]<-m[[5]]+getlocation(myjson$allMovementTrackers[[5+j]])}
    if (bait=="up" & info=="low"){m[[6]]<-m[[6]]+getlocation(myjson$allMovementTrackers[[5+j]])}
    if (bait=="up" & info=="high"){m[[7]]<-m[[7]]+getlocation(myjson$allMovementTrackers[[5+j]])}
    if (bait=="down" & info=="low"){m[[8]]<-m[[8]]+getlocation(myjson$allMovementTrackers[[5+j]])}
  }
}

#let's create a plotting function to avoid having to do the same comment 8 times
#takes in the number of the testmap k and a title
myplot<-function(k, title){
  #get png image, this is linked to where I've stored them
  img <- readPNG(paste0("/home/hanshalbe/Documents/newimage", k, ".png"))
  #get a raster with interpolations for this image
  g <- rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc"))
  #get the correct counts as matrix
  map<-m[[k]][9:15, 9:15]
  #make a data frame of it for ggplot
  dp<-data.frame(x=rep(-3:3 ,each=7), y=rep(-3:3, 7), count=as.vector(map))
  #the max is always the starting point, we won't color it as this would be misleading
  dp$count<-ifelse(dp$count==max(dp$count), NA, dp$count)
  #everything with 0 counts won't be plotted
  dp$count<-ifelse(dp$count==0, NA, dp$count)
  #start to plot with fills determined by counts
  p  <- ggplot(dp, aes(x = x, y = y, fill = count))+
    #go all over the plot
    annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)+
    #lower alpha of heatmap such that png is still recognizable
    geom_tile(alpha=0.4)+ theme_bw()+
    #palett
    scale_fill_distiller(palette = "Spectral")+
    #expand over whole plot
    scale_x_continuous(expand = c(0, 0))+ 
    #same for y
    scale_y_discrete(expand = c(0, 0))+ guides(fill=FALSE)+
    #adjust them
    theme(strip.background = element_blank(),
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+ggtitle(title)
  #return the plot
  return(p)
}

The function above enables us to produce the heatmaps and overlay them over the testmaps. This will provide the same info as the tests before but in a nicer visual way. Thus, I will close with those plots here.

myplot(1, "Restricted space, high information")

myplot(2, "Restricted space, low information")

myplot(3, "Unrestricted space, high information")

myplot(4, "Unrestricted space, low information")

myplot(5, "Restricted space, high information")

myplot(6, "Restricted space, low information")

myplot(7, "Unrestricted space, high information")

myplot(8, "Unrestricted space, low information")

To-Do